from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
drebin_data = pd.read_csv('drebin.csv')
/tmp/ipykernel_68716/899786895.py:1: DtypeWarning: Columns (92) have mixed types. Specify dtype option on import or set low_memory=False.
drebin_data = pd.read_csv('drebin.csv')
Exploratory Data Analysis (EDA)
1. Descriptive Statistics:
Summary Statistics:
# Summary statistics for numerical columns
numerical_summary = drebin_data.describe()
print(numerical_summary)
transact onServiceConnected bindService attachInterface \
count 15036.000000 15036.000000 15036.000000 15036.000000
mean 0.426443 0.446595 0.442671 0.413208
std 0.494576 0.497156 0.496719 0.492426
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000
75% 1.000000 1.000000 1.000000 1.000000
max 1.000000 1.000000 1.000000 1.000000
ServiceConnection android.os.Binder SEND_SMS \
count 15036.000000 15036.000000 15036.000000
mean 0.444932 0.486898 0.236632
std 0.496975 0.499845 0.425029
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 1.000000 1.000000 0.000000
max 1.000000 1.000000 1.000000
Ljava.lang.Class.getCanonicalName Ljava.lang.Class.getMethods \
count 15036.000000 15036.000000
mean 0.330806 0.282389
std 0.470519 0.450177
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 1.000000 1.000000
max 1.000000 1.000000
Ljava.lang.Class.cast Ljava.net.URLDecoder \
count 15036.000000 15036.000000
mean 0.312583 0.408553
std 0.463561 0.491583
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 1.000000 1.000000
max 1.000000 1.000000
android.content.pm.Signature android.telephony.SmsManager \
count 15036.000000 15036.000000
mean 0.356145 0.185222
std 0.478875 0.388491
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 1.000000 0.000000
max 1.000000 1.000000
READ_PHONE_STATE getBinder ClassLoader \
count 15036.000000 15036.000000 15036.000000
mean 0.632416 0.188547 0.520617
std 0.482163 0.391162 0.499591
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 1.000000 0.000000 1.000000
75% 1.000000 0.000000 1.000000
max 1.000000 1.000000 1.000000
Landroid.content.Context.registerReceiver Ljava.lang.Class.getField \
count 15036.000000 15036.000000
mean 0.503458 0.443536
std 0.500005 0.496818
min 0.000000 0.000000
25% 0.000000 0.000000
50% 1.000000 0.000000
75% 1.000000 1.000000
max 1.000000 1.000000
Landroid.content.Context.unregisterReceiver GET_ACCOUNTS \
count 15036.000000 15036.000000
mean 0.426377 0.298750
std 0.494566 0.457725
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 1.000000 1.000000
max 1.000000 1.000000
RECEIVE_SMS Ljava.lang.Class.getDeclaredField READ_SMS \
count 15036.000000 15036.000000 15036.000000
mean 0.187151 0.477055 0.186752
std 0.390046 0.499490 0.389725
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 1.000000 0.000000
max 1.000000 1.000000 1.000000
getCallingUid Ljavax.crypto.spec.SecretKeySpec \
count 15036.000000 15036.000000
mean 0.121641 0.431431
std 0.326882 0.495292
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 1.000000
max 1.000000 1.000000
android.intent.action.BOOT_COMPLETED USE_CREDENTIALS MANAGE_ACCOUNTS \
count 15036.000000 15036.000000 15036.000000
mean 0.449122 0.101091 0.104084
std 0.497421 0.301459 0.305379
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 1.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
android.content.pm.PackageInfo KeySpec \
count 15036.000000 15036.000000
mean 0.670059 0.472799
std 0.470207 0.499276
min 0.000000 0.000000
25% 0.000000 0.000000
50% 1.000000 0.000000
75% 1.000000 1.000000
max 1.000000 1.000000
TelephonyManager.getLine1Number DexClassLoader HttpGet.init \
count 15036.000000 15036.000000 15036.000000
mean 0.247739 0.203778 0.532389
std 0.431714 0.402819 0.498966
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 1.000000
75% 0.000000 0.000000 1.000000
max 1.000000 1.000000 1.000000
SecretKey Ljava.lang.Class.getMethod System.loadLibrary \
count 15036.000000 15036.000000 15036.000000
mean 0.460229 0.578412 0.365656
std 0.498432 0.493830 0.481630
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 1.000000 0.000000
75% 1.000000 1.000000 1.000000
max 1.000000 1.000000 1.000000
android.intent.action.SEND Ljavax.crypto.Cipher WRITE_SMS \
count 15036.000000 15036.000000 15036.000000
mean 0.111399 0.435555 0.113195
std 0.314636 0.495846 0.316842
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 1.000000 0.000000
max 1.000000 1.000000 1.000000
READ_SYNC_SETTINGS AUTHENTICATE_ACCOUNTS \
count 15036.000000 15036.000000
mean 0.083267 0.064379
std 0.276294 0.245435
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
android.telephony.gsm.SmsManager WRITE_HISTORY_BOOKMARKS \
count 15036.000000 15036.000000
mean 0.044294 0.079343
std 0.205754 0.270282
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
TelephonyManager.getSubscriberId mount INSTALL_PACKAGES \
count 15036.000000 15036.000000 15036.000000
mean 0.214884 0.597433 0.069965
std 0.410756 0.490431 0.255097
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 1.000000 0.000000
75% 0.000000 1.000000 0.000000
max 1.000000 1.000000 1.000000
Runtime.getRuntime CAMERA Ljava.lang.Object.getClass \
count 15036.000000 15036.000000 15036.000000
mean 0.438880 0.136605 0.727787
std 0.496267 0.343442 0.445114
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 1.000000
75% 1.000000 0.000000 1.000000
max 1.000000 1.000000 1.000000
WRITE_SYNC_SETTINGS READ_HISTORY_BOOKMARKS Ljava.lang.Class.forName \
count 15036.000000 15036.000000 15036.000000
mean 0.077082 0.092910 0.620444
std 0.266730 0.290316 0.485293
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 1.000000
75% 0.000000 0.000000 1.000000
max 1.000000 1.000000 1.000000
INTERNET android.intent.action.PACKAGE_REPLACED Binder \
count 15036.000000 15036.00000 15036.000000
mean 0.871974 0.07482 0.790902
std 0.334130 0.26311 0.406678
min 0.000000 0.00000 0.000000
25% 1.000000 0.00000 1.000000
50% 1.000000 0.00000 1.000000
75% 1.000000 0.00000 1.000000
max 1.000000 1.00000 1.000000
android.intent.action.SEND_MULTIPLE RECORD_AUDIO IBinder \
count 15036.000000 15036.000000 15036.000000
mean 0.048417 0.090250 0.783985
std 0.214653 0.286549 0.411538
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 1.000000
50% 0.000000 0.000000 1.000000
75% 0.000000 0.000000 1.000000
max 1.000000 1.000000 1.000000
android.os.IBinder createSubprocess NFC \
count 15036.000000 15036.000000 15036.000000
mean 0.783919 0.020684 0.041168
std 0.411584 0.142328 0.198685
min 0.000000 0.000000 0.000000
25% 1.000000 0.000000 0.000000
50% 1.000000 0.000000 0.000000
75% 1.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
ACCESS_LOCATION_EXTRA_COMMANDS URLClassLoader WRITE_APN_SETTINGS \
count 15036.000000 15036.000000 15036.000000
mean 0.051809 0.047819 0.047486
std 0.221649 0.213389 0.212683
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
abortBroadcast BIND_REMOTEVIEWS android.intent.action.TIME_SET \
count 15036.000000 15036.000000 15036.000000
mean 0.112397 0.037244 0.044959
std 0.315865 0.189365 0.207220
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
READ_PROFILE TelephonyManager.getDeviceId MODIFY_AUDIO_SETTINGS \
count 15036.000000 15036.00000 15036.000000
mean 0.034916 0.53904 0.065044
std 0.183574 0.49849 0.246611
min 0.000000 0.00000 0.000000
25% 0.000000 0.00000 0.000000
50% 0.000000 1.00000 0.000000
75% 0.000000 1.00000 0.000000
max 1.000000 1.00000 1.000000
getCallingPid READ_SYNC_STATS BROADCAST_STICKY \
count 15036.000000 15036.000000 15036.000000
mean 0.039505 0.034584 0.051144
std 0.194800 0.182729 0.220299
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
android.intent.action.PACKAGE_REMOVED \
count 15036.000000
mean 0.064312
std 0.245317
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
android.intent.action.TIMEZONE_CHANGED WAKE_LOCK \
count 15036.000000 15036.000000
mean 0.030128 0.487231
std 0.170944 0.499854
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 1.000000
max 1.000000 1.000000
RECEIVE_BOOT_COMPLETED RESTART_PACKAGES Ljava.lang.Class.getPackage \
count 15036.000000 15036.000000 15036.000000
mean 0.384344 0.079742 0.238428
std 0.486456 0.270902 0.426136
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 1.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
chmod Ljava.lang.Class.getDeclaredClasses \
count 15036.000000 15036.00000
mean 0.112796 0.02760
std 0.316354 0.16383
min 0.000000 0.00000
25% 0.000000 0.00000
50% 0.000000 0.00000
75% 0.000000 0.00000
max 1.000000 1.00000
android.intent.action.ACTION_POWER_DISCONNECTED \
count 15036.00000
mean 0.02760
std 0.16383
min 0.00000
25% 0.00000
50% 0.00000
75% 0.00000
max 1.00000
android.intent.action.PACKAGE_ADDED PathClassLoader \
count 15036.00000 15036.000000
mean 0.07482 0.025738
std 0.26311 0.158359
min 0.00000 0.000000
25% 0.00000 0.000000
50% 0.00000 0.000000
75% 0.00000 0.000000
max 1.00000 1.000000
TelephonyManager.getSimSerialNumber Runtime.load \
count 15036.000000 15036.000000
mean 0.128492 0.019620
std 0.334648 0.138694
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
TelephonyManager.getCallState BLUETOOTH READ_CALENDAR \
count 15036.000000 15036.000000 15036.000000
mean 0.077614 0.079343 0.039439
std 0.267572 0.270282 0.194643
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
READ_CALL_LOG SUBSCRIBED_FEEDS_WRITE READ_EXTERNAL_STORAGE \
count 15036.000000 15036.000000 15036.000000
mean 0.016228 0.016161 0.105547
std 0.126354 0.126100 0.307267
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
sendMultipartTextMessage PackageInstaller VIBRATE remount \
count 15036.000000 15036.000000 15036.000000 15036.000000
mean 0.017957 0.029662 0.362663 0.025140
std 0.132799 0.169659 0.480785 0.156554
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000
75% 0.000000 0.000000 1.000000 0.000000
max 1.000000 1.000000 1.000000 1.000000
android.intent.action.ACTION_SHUTDOWN sendDataMessage \
count 15036.000000 15036.000000
mean 0.019620 0.019886
std 0.138694 0.139612
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
ACCESS_NETWORK_STATE chown HttpPost.init \
count 15036.000000 15036.000000 15036.000000
mean 0.724195 0.016959 0.568569
std 0.446934 0.129123 0.495292
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 1.000000 0.000000 1.000000
75% 1.000000 0.000000 1.000000
max 1.000000 1.000000 1.000000
Ljava.lang.Class.getClasses SUBSCRIBED_FEEDS_READ \
count 15036.000000 15036.000000
mean 0.045491 0.016427
std 0.208385 0.127116
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
TelephonyManager.isNetworkRoaming CHANGE_WIFI_MULTICAST_STATE \
count 15036.000000 15036.000000
mean 0.052075 0.012503
std 0.222186 0.111121
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
WRITE_CALENDAR android.intent.action.PACKAGE_DATA_CLEARED \
count 15036.000000 15036.000000
mean 0.035581 0.010641
std 0.185250 0.102609
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
MASTER_CLEAR HttpUriRequest UPDATE_DEVICE_STATS WRITE_CALL_LOG \
count 15036.000000 15036.000000 15036.000000 15036.000000
mean 0.011971 0.647646 0.014632 0.009710
std 0.108760 0.477720 0.120077 0.098063
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000
50% 0.000000 1.000000 0.000000 0.000000
75% 0.000000 1.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000 1.000000
DELETE_PACKAGES GET_TASKS GLOBAL_SEARCH DELETE_CACHE_FILES \
count 15036.000000 15036.000000 15036.000000 15036.000000
mean 0.027534 0.176044 0.027999 0.009910
std 0.163639 0.380870 0.164977 0.099056
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000 1.000000
WRITE_USER_DICTIONARY android.intent.action.PACKAGE_CHANGED \
count 15036.000000 15036.000000
mean 0.008114 0.019354
std 0.089714 0.137769
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
android.intent.action.NEW_OUTGOING_CALL REORDER_TASKS WRITE_PROFILE \
count 15036.000000 15036.000000 15036.000000
mean 0.018156 0.012703 0.007249
std 0.133521 0.111992 0.084836
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
SET_WALLPAPER BIND_INPUT_METHOD divideMessage READ_SOCIAL_STREAM \
count 15036.000000 15036.000000 15036.000000 15036.000000
mean 0.067837 0.010575 0.036379 0.006385
std 0.251475 0.102291 0.187238 0.079651
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000 1.000000
READ_USER_DICTIONARY PROCESS_OUTGOING_CALLS CALL_PRIVILEGED \
count 15036.000000 15036.000000 15036.000000
mean 0.006983 0.024940 0.009045
std 0.083276 0.155948 0.094677
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
Runtime.exec BIND_WALLPAPER RECEIVE_WAP_PUSH DUMP \
count 15036.000000 15036.000000 15036.000000 15036.000000
mean 0.310122 0.031790 0.016494 0.006717
std 0.462559 0.175447 0.127369 0.081686
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000
75% 1.000000 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000 1.000000
BATTERY_STATS ACCESS_COARSE_LOCATION SET_TIME \
count 15036.000000 15036.000000 15036.000000
mean 0.022612 0.282721 0.005653
std 0.148669 0.450337 0.074977
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 1.000000 0.000000
max 1.000000 1.000000 1.000000
android.intent.action.SENDTO WRITE_SOCIAL_STREAM WRITE_SETTINGS \
count 15036.000000 15036.000000 15036.000000
mean 0.019021 0.004922 0.154496
std 0.136603 0.069983 0.361435
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
REBOOT BLUETOOTH_ADMIN TelephonyManager.getNetworkOperator \
count 15036.000000 15036.000000 15036.000000
mean 0.014366 0.051875 0.480713
std 0.118996 0.221783 0.499644
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 1.000000
max 1.000000 1.000000 1.000000
/system/bin MessengerService BIND_DEVICE_ADMIN WRITE_GSERVICES \
count 15036.000000 15036.000000 15036.000000 15036.000000
mean 0.142990 0.003990 0.014632 0.004256
std 0.350074 0.063046 0.120077 0.065105
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000 1.000000
IRemoteService KILL_BACKGROUND_PROCESSES SET_ALARM \
count 15036.000000 15036.000000 15036.000000
mean 0.008912 0.040636 0.025339
std 0.093985 0.197452 0.157158
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
ACCOUNT_MANAGER /system/app android.intent.action.CALL \
count 15036.000000 15036.000000 15036.000000
mean 0.005387 0.153166 0.004789
std 0.073201 0.360159 0.069035
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
STATUS_BAR TelephonyManager.getSimOperator PERSISTENT_ACTIVITY \
count 15036.000000 15036.000000 15036.00000
mean 0.029529 0.229250 0.00838
std 0.169290 0.420364 0.09116
min 0.000000 0.000000 0.00000
25% 0.000000 0.000000 0.00000
50% 0.000000 0.000000 0.00000
75% 0.000000 0.000000 0.00000
max 1.000000 1.000000 1.00000
CHANGE_NETWORK_STATE onBind Process.start \
count 15036.000000 15036.000000 15036.000000
mean 0.091048 0.686419 0.002727
std 0.287687 0.463964 0.052149
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 1.000000 0.000000
75% 0.000000 1.000000 0.000000
max 1.000000 1.000000 1.000000
android.intent.action.SCREEN_ON Context.bindService RECEIVE_MMS \
count 15036.000000 15036.000000 15036.000000
mean 0.008446 0.002527 0.023743
std 0.091518 0.050210 0.152253
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
SET_TIME_ZONE android.intent.action.BATTERY_OKAY \
count 15036.000000 15036.000000
mean 0.003059 0.005520
std 0.055228 0.074094
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
CONTROL_LOCATION_UPDATES BROADCAST_WAP_PUSH \
count 15036.000000 15036.000000
mean 0.004789 0.006518
std 0.069035 0.080471
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
BIND_ACCESSIBILITY_SERVICE ADD_VOICEMAIL CALL_PHONE \
count 15036.000000 15036.000000 15036.000000
mean 0.002793 0.001995 0.114326
std 0.052780 0.044625 0.318217
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
ProcessBuilder BIND_APPWIDGET FLASHLIGHT READ_LOGS \
count 15036.000000 15036.000000 15036.000000 15036.000000
mean 0.087922 0.004323 0.026536 0.078478
std 0.283191 0.065609 0.160729 0.268932
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000 1.000000
Ljava.lang.Class.getResource defineClass SET_PROCESS_LIMIT \
count 15036.000000 15036.000000 15036.000000
mean 0.385142 0.012104 0.002195
std 0.486645 0.109355 0.046798
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 1.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
android.intent.action.PACKAGE_RESTARTED MOUNT_UNMOUNT_FILESYSTEMS \
count 15036.000000 15036.000000
mean 0.007715 0.033320
std 0.087497 0.179477
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
BIND_TEXT_SERVICE INSTALL_LOCATION_PROVIDER \
count 15036.000000 15036.000000
mean 0.001530 0.002062
std 0.039082 0.045361
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
android.intent.action.CALL_BUTTON android.intent.action.SCREEN_OFF \
count 15036.000000 15036.000000
mean 0.002461 0.007515
std 0.049547 0.086367
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
findClass SYSTEM_ALERT_WINDOW MOUNT_FORMAT_FILESYSTEMS \
count 15036.000000 15036.000000 15036.000000
mean 0.096502 0.073490 0.002660
std 0.295288 0.260948 0.051511
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
CHANGE_CONFIGURATION CLEAR_APP_USER_DATA intent.action.RUN \
count 15036.000000 15036.000000 15036.000000
mean 0.025672 0.002926 0.002926
std 0.158159 0.054018 0.054018
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
android.intent.action.SET_WALLPAPER CHANGE_WIFI_STATE \
count 15036.000000 15036.000000
mean 0.016228 0.161878
std 0.126354 0.368351
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
READ_FRAME_BUFFER ACCESS_SURFACE_FLINGER Runtime.loadLibrary \
count 15036.000000 15036.000000 15036.000000
mean 0.008446 0.002860 0.000998
std 0.091518 0.053402 0.031570
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
BROADCAST_SMS EXPAND_STATUS_BAR INTERNAL_SYSTEM_WINDOW \
count 15036.000000 15036.000000 15036.000000
mean 0.009577 0.020817 0.003924
std 0.097396 0.142775 0.062520
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
android.intent.action.BATTERY_LOW SET_ACTIVITY_WATCHER \
count 15036.000000 15036.000000
mean 0.007050 0.001064
std 0.083669 0.032604
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
WRITE_CONTACTS android.intent.action.ACTION_POWER_CONNECTED \
count 15036.000000 15036.000000
mean 0.106810 0.059989
std 0.308882 0.237475
min 0.000000 0.000000
25% 0.000000 0.000000
50% 0.000000 0.000000
75% 0.000000 0.000000
max 1.000000 1.000000
BIND_VPN_SERVICE DISABLE_KEYGUARD ACCESS_MOCK_LOCATION \
count 15036.000000 15036.000000 15036.000000
mean 0.000532 0.077015 0.013301
std 0.023061 0.266624 0.114566
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
GET_PACKAGE_SIZE MODIFY_PHONE_STATE CHANGE_COMPONENT_ENABLED_STATE \
count 15036.000000 15036.000000 15036.000000
mean 0.017691 0.019819 0.009511
std 0.131830 0.139383 0.097060
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
CLEAR_APP_CACHE SET_ORIENTATION READ_CONTACTS DEVICE_POWER \
count 15036.000000 15036.000000 15036.000000 15036.000000
mean 0.014299 0.007050 0.233307 0.017425
std 0.118724 0.083669 0.422950 0.130852
min 0.000000 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000 0.000000
75% 0.000000 0.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000 1.000000
HARDWARE_TEST ACCESS_WIFI_STATE WRITE_EXTERNAL_STORAGE \
count 15036.000000 15036.000000 15036.000000
mean 0.004256 0.434424 0.666135
std 0.065105 0.495698 0.471608
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 1.000000
75% 0.000000 1.000000 1.000000
max 1.000000 1.000000 1.000000
ACCESS_FINE_LOCATION SET_WALLPAPER_HINTS SET_PREFERRED_APPLICATIONS \
count 15036.000000 15036.000000 15036.000000
mean 0.290835 0.016028 0.007050
std 0.454163 0.125588 0.083669
min 0.000000 0.000000 0.000000
25% 0.000000 0.000000 0.000000
50% 0.000000 0.000000 0.000000
75% 1.000000 0.000000 0.000000
max 1.000000 1.000000 1.000000
WRITE_SECURE_SETTINGS
count 15036.000000
mean 0.044959
std 0.207220
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 1.000000
Unique Values in Categorical Columns:
# Unique values in categorical columns
categorical_columns = drebin_data.select_dtypes(include='object').columns.tolist()
for col in categorical_columns:
unique_values = drebin_data[col].unique()
print(f"Unique values in column '{col}': {unique_values}")
Unique values in column 'TelephonyManager.getSimCountryIso': ['0' '1' '?' 1 0] Unique values in column 'class': ['S' 'B']
2. Data Visualization:
Histograms for Numerical Columns:
numerical_columns = drebin_data.select_dtypes(include='int64').columns.tolist()
for col in numerical_columns:
drebin_data[col].hist(bins=20)
plt.xlabel(col)
plt.ylabel('Frequency')
plt.title(f'Histogram of {col}')
plt.show()
Bar Plot for Categorical Columns:
# Bar plot for categorical columns
for col in categorical_columns:
drebin_data[col].value_counts().plot(kind='bar')
plt.xlabel(col)
plt.ylabel('Count')
plt.title(f'Bar plot of {col}')
plt.show()
Correlation Heatmap:
# Replace non-numeric values ('S' and '?') with NaN
drebin_data.replace(['?', 'S','B'], np.nan, inplace=True)
# Convert columns to numeric type
drebin_data = drebin_data.apply(pd.to_numeric, errors='ignore')
# Correlation heatmap for numerical columns
correlation_matrix = drebin_data.corr()
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.show()
3. Understanding Target Variable:
Class Distribution:
# Class distribution (assuming 'class' is the target variable)
class_distribution = drebin_data['class'].value_counts()
print(class_distribution)
class B 9476 S 5560 Name: count, dtype: int64
class_distribution.plot(kind='bar')
plt.xlabel('Class')
plt.ylabel('Count')
plt.title('Class Distribution')
plt.show()
# Identify columns with missing values
columns_with_missing = drebin_data.columns[drebin_data.isnull().any()].tolist()
print("Columns with missing values:", columns_with_missing)
# Impute missing values with strategies like mean, median, mode, or others for numerical columns
for col in columns_with_missing:
if drebin_data[col].dtype == 'float64' or drebin_data[col].dtype == 'int64':
drebin_data[col].fillna(drebin_data[col].mean(), inplace=True)
else:
drebin_data[col].fillna(drebin_data[col].mode()[0], inplace=True) # For categorical columns
# Check if missing values have been handled
print("Columns with missing values after handling:", drebin_data.columns[drebin_data.isnull().any()].tolist())
Columns with missing values: [] Columns with missing values after handling: []
# Check and list categorical columns for potential encoding
categorical_columns = drebin_data.select_dtypes(include='object').columns.tolist()
print("Categorical columns:", categorical_columns)
# If categorical columns need encoding (using one-hot encoding as an example)
drebin_data_encoded = pd.get_dummies(drebin_data, columns=categorical_columns)
Categorical columns: ['TelephonyManager.getSimCountryIso', 'class']
# Identify numeric and non-numeric columns
numeric_columns = drebin_data.select_dtypes(include=[np.number]).columns.tolist()
non_numeric_columns = drebin_data.select_dtypes(exclude=[np.number]).columns.tolist()
# Replace missing values in non-numeric columns with 'unknown'
drebin_data[non_numeric_columns] = drebin_data[non_numeric_columns].fillna('unknown')
# Replace missing values in numeric columns with their mean
drebin_data[numeric_columns] = drebin_data[numeric_columns].apply(lambda x: x.fillna(x.mean()))
# Split features and target variable
X = drebin_data.drop('class', axis=1)
y = drebin_data['class']
# Handle '?' values in a specific column
X['TelephonyManager.getSimCountryIso'].replace('?', 'unknown', inplace=True)
# Encode categorical column
label_encoder = LabelEncoder()
X['TelephonyManager.getSimCountryIso'] = label_encoder.fit_transform(X['TelephonyManager.getSimCountryIso'].astype(str))
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Models
models = [
('Logistic Regression', LogisticRegression(max_iter=1000)),
('Random Forest', RandomForestClassifier(n_estimators=100)),
('SVM', SVC())
]
accuracy_scores = []
for name, model in models:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracy_scores.append(accuracy) # Collect accuracy scores for each model
print(f"Classification Report for {name}:")
print(classification_report(y_test, y_pred))
Classification Report for Logistic Regression:
precision recall f1-score support
B 0.97 0.98 0.98 1863
S 0.98 0.96 0.97 1145
accuracy 0.97 3008
macro avg 0.97 0.97 0.97 3008
weighted avg 0.97 0.97 0.97 3008
Classification Report for Random Forest:
precision recall f1-score support
B 0.99 1.00 0.99 1863
S 0.99 0.98 0.99 1145
accuracy 0.99 3008
macro avg 0.99 0.99 0.99 3008
weighted avg 0.99 0.99 0.99 3008
Classification Report for SVM:
precision recall f1-score support
B 0.98 0.99 0.98 1863
S 0.99 0.96 0.97 1145
accuracy 0.98 3008
macro avg 0.98 0.98 0.98 3008
weighted avg 0.98 0.98 0.98 3008
# Create an array of indices for the models
x = np.arange(len(model_names))
# Plotting the bar chart
plt.figure(figsize=(8, 6))
bars = plt.bar(x, accuracy_scores, color=['blue', 'green', 'orange'])
plt.xlabel('Models')
plt.ylabel('Accuracy')
plt.title('Accuracy of Different Models')
plt.xticks(x, model_names) # Set model names as x-axis labels
plt.ylim(0.95, 1.0) # Set y-axis limits for better visualization
# Displaying values on top of the bars
for bar, score in zip(bars, accuracy_scores):
plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() - 0.01, f'{score:.4f}',
ha='center', va='bottom', color='black', fontweight='bold')
plt.show()
Conclusion
Let's delve into the strengths and weaknesses of each approach and make a recommendation based on the analysis:
Logistic Regression:¶
Strengths:
- Good interpretability and simplicity.
- Fast training and predictions.
- Less prone to overfitting due to its simplicity.
Weaknesses:
- Assumes a linear relationship between features and the target.
- Limited capability to capture complex patterns in data.
Recommendation: Logistic Regression performs well with decent accuracy, precision, and recall. However, it might not capture complex relationships in the data.
Random Forest:¶
Strengths:
- Handles non-linear relationships well.
- Less prone to overfitting due to ensemble learning (multiple decision trees).
- Works well without extensive hyperparameter tuning.
Weaknesses:
- Computational complexity, especially with a large number of trees.
- Can be challenging to interpret compared to simpler models like Logistic Regression.
Recommendation: Random Forest demonstrates high accuracy and robustness in handling complex data. It's a strong choice, but its interpretability might be a concern.
Support Vector Machine (SVM):¶
Strengths:
- Effective in high-dimensional spaces.
- Versatile due to different kernel functions.
- Robust against overfitting when appropriate kernel and regularization are chosen.
Weaknesses:
- Computationally intensive, especially with large datasets.
- Sensitive to the choice of kernel and its parameters.
Recommendation: SVMs provide good accuracy and flexibility in capturing complex relationships. However, their computational intensity might limit their scalability.
Conclusion & Recommendation:¶
Based on the analysis:
- Random Forest exhibits the highest accuracy and robustness among the three models.
- Logistic Regression is simpler and interpretable but might not capture complex relationships.
- SVM performs well but might be computationally intensive.
Given the trade-offs, Random Forest is recommended for its excellent performance and capability to handle complex relationships. However, if interpretability is a crucial factor or computational resources are limited, Logistic Regression could be considered as an alternative.